********************************************************************************
*                                                                              *
*                           BANDWIDTH OPTIMISATION                             *
*                                                                              *
********************************************************************************

cd "U:\"  // open directory
*use "\NVM_20002014_full.dta", clear
use "pc4regression.dta", clear

global sharp = 0            // Allow for optimisation of bandwidth
global fuzzy = 1             // Allow for optimisation of bandwidth
global min = 1               // Set minimum value for third-order terms
local minvalue = 0.0001      // Set minimum value for 

set matsize 1000



********************************************************************************
*                           House price analyses                               *
********************************************************************************

* PC4 regressions
global dependent dmshwownocc
global instrument dmscorerule
global treatment dmkw
global house 
global neighbourhood 
global adjustment
global year dmyear_*
global score zscore_* 

/*
* House prices
global dependent dmlogpricesqm
global dependent dmlogdaysonmarket
*global dependent dpercask
global instrument dmscorerule
global treatment dmkw
global house dmlogsize dmrooms dmmaintgood dmcentralheating dmlisted 
global neighbourhood //dmlogincome dmlogpopdens dmshforeign dmshyoung dmshold dmhhsize dmluinfr dmluind dmluopens dmluwater
global adjustment dmdaysinv //dmdaysinv1 dmdaysinv2 dmdaysinv3 dmdaysinv4 dmdaysinv5
global year dmyear_*
global score zscore_* 
*/

/*
* In levels
drop if (year < $tyear | (year == $tyear & month < $tmonth) | (year == $tyear & month == $tmonth & day <= $tday))
g kwexcl = ((zscore>=7.3 & inkw == 0) | (zscore<7.3 & inkw == 1))
global dependent logpricesqm
*global dependent logdaysonmarket
*global dependent dpercask
global treatment kw
global instrument scorerule
global house logsize rooms maintgood centralheating listed terraced semidetached detached garage garden constr19451959 constr19601970 constr19711980 constr19811990 constr19912000 constrgt2000
global neighbourhood logincome logpopdens shforeign shyoung shold hhsize luinfr luind luopens luwater 
global adjustment //daysinv //dmdaysinv1 dmdaysinv2 dmdaysinv3 dmdaysinv4 dmdaysinv5
global year year_*
global score zscore_* 
*/

quietly local c = 7.30 // Cut-off
quietly global dT = 2.5 // Cut-off distance
quietly tostring pc4, g(pc2)
quietly replace pc2 = substr(pc2,1,3)
quietly destring pc2, force replace 
quietly g weight = .
quietly global fe pc4
quietly global se pc4
quietly xtset $fe

/// Step 0: Keep relevant observations
*quietly keep if (inkw == 1 | kwdist > $dT)
quietly drop if kwadjacent == 1
quietly drop if $dependent == .


/*
********************************************************************************
*                 Neighbourhood analyses (including balancing tests)           *
********************************************************************************

global dependent hhsize
global treatment inkw
global instrument inscorerule
global house 
global neighbourhood
global adjustment
global year year_*
global score zscore_* 

quietly local c = 7.30 // Cut-off
quietly global dT = 2.5 // Cut-off distance
quietly tostring pc4, g(pc2)
quietly replace pc2 = substr(pc2,1,3)
quietly destring pc2, force replace 
quietly g weight = .
quietly global fe pc4
quietly global se pc4
quietly xtset $fe

/// Step 0: Keep relevant observations
quietly drop if kwadjacent == 1
quietly drop if $dependent == .
quietly drop if year >= 2007

********************************************************************************
*/

quietly local N = _N // Number of observations


quietly if $sharp == 1 {
	
	drop if kwexcl == 1
	
	/******************************************************************************/
	/*Step 1: Estimation of density f(c) and conditional variance sigma-squared(c)*/
	/******************************************************************************/

	/*Calculating the pilot bandwidth h1*/
	summarize zscore, detail
	local Sx = r(sd)
	local N = r(N)
	local h1 = 1.84*`Sx'*(`N'^(-1/5))

	/* Calculating the number of units on either side of the threshold and the average outcomes*/
	count if (`c'-`h1')<=zscore & zscore<`c'
	local Nh1n = r(N)

	count if `c'<=zscore & zscore<=(`c'+`h1')
	local Nh1p = r(N)

	ameans $dependent if (`c'-`h1')<=zscore & zscore<`c'
	local Yh1n = r(mean)

	ameans $dependent if `c'<=zscore & zscore<=(`c'+`h1')
	local Yh1p = r(mean)

	/* Estimating the density of X at c and the conditional variance of Y given X_i=c at x=c */
	local fxc = (`Nh1p'+`Nh1n')/(2*`N'*`h1')

	generate n$dependent =($dependent-`Yh1n')^2 
	summarize n$dependent if (`c'-`h1')<=zscore & zscore<`c', detail
	local Y1sum =r(sum)

	generate p$dependent =($dependent-`Yh1p')^2 
	summarize p$dependent if `c'<=zscore & zscore<=(`c'+`h1'), detail
	local Y2sum =r(sum)

	local sigma2c = (1/(`Nh1p'+`Nh1n'))*(`Y1sum'+`Y2sum')


	/******************************************************************************/
	/*                Step 2: Estimation of second derivatives                    */
	/******************************************************************************/

	/*Calculating the pilot bandwidth h2*/

	summarize zscore if zscore>=`c', detail
	scalar medXp = r(p50)

	summarize zscore if zscore<`c', detail
	scalar medXn = r(p50)

	regress $dependent $treatment zscore_1 zscore_2 zscore_3 $house $neighbourhood $adjustment $year if zscore >= medXn & zscore <= medXp

	local m3c=6*_b[zscore_3] 

	count if zscore>=`c'
	local Np=r(N)

	count if zscore<`c'
	local Nn=r(N)

	if $min == 1 {
		local h2p=3.56*((`sigma2c'/(`fxc'*max((`m3c')^2, `minvalue')))^(1/7))*(`Np'^(-1/7))
		local h2n=3.56*((`sigma2c'/(`fxc'*max((`m3c')^2, `minvalue')))^(1/7))*(`Nn'^(-1/7))
	}
	else {
		local h2p=3.56*((`sigma2c'/(`fxc'*(`m3c')^2))^(1/7))*(`Np'^(-1/7))
		local h2n=3.56*((`sigma2c'/(`fxc'*(`m3c')^2))^(1/7))*(`Nn'^(-1/7))
	}
	
	/*Calculating the estimates of the second derivatives*/
	count if `c'<=zscore & zscore<=(`c'+`h2p')
	local N2p=r(N)

	count if (`c'-`h2n')<=zscore & zscore<`c'
	local N2n=r(N)

	generate constant = 1
	regress $dependent constant zscore_1 zscore_2  $house $neighbourhood $adjustment $year if `c'<=zscore & zscore<=(`c'+`h2p')
	local m2pc=2*_b[zscore_2]
	regress $dependent constant zscore_1 zscore_2  $house $neighbourhood $adjustment $year if (`c'-`h2n')<=zscore & zscore<`c'
	local m2nc=2*_b[zscore_2] 

	/**************************************************************************************/
	/*Step 3: Calculation of regularization terms and calculation of the optimal bandwidth*/
	**************************************************************************************/
	
	/*Calculating the regularization terms*/
	local rp=(720*`sigma2c')/(`N2p'*((`h2p')^4))
	local rn=(720*`sigma2c')/(`N2n'*((`h2n')^4))

	/*Calculating the optimal bandwidth*/

	local CK = 5.4
	local h_sharp= `CK'*(((2*`sigma2c')/(`fxc'*(((`m2pc'-`m2nc')^2)+(`rp'+`rn'))))^(1/5))*`N'^(-1/5)

	regress $dependent $treatment $house $adjustment $neighbourhood $year constant if zscore > (`c'-`h_sharp') & zscore < (`c'+`h_sharp'), cluster($se)
	local beta_sharp = _b[$treatment]
	local sdbeta_sharp = _se[$treatment]
	local tbeta_sharp = abs(`beta_sharp'/`sdbeta_sharp')

}

quietly if $fuzzy == 1 {

	/**********************************************************************************/
	/* Step 1.1: Estimation of density f(c) and conditional variance sigma-squared(c) */
	/**********************************************************************************/

	/*Calculating the pilot bandwidth h1*/
	quietly summarize zscore, detail
	local Sx = r(sd)
	local N = r(N)
	local h1 = 1.84*`Sx'*(`N'^(-1/5))

	/* Calculating the number of units on either side of the threshold and the average outcomes*/
	quietly count if (`c'-`h1')<=zscore & zscore<`c'
	local Nh1n = r(N)

	quietly count if `c'<=zscore & zscore<=(`c'+`h1')
	local Nh1p = r(N)

	quietly ameans $treatment if (`c'-`h1')<=zscore & zscore<`c'
	local Wh1n = r(mean)

	quietly ameans $treatment if `c'<=zscore & zscore<=(`c'+`h1')
	local Wh1p = r(mean)

	/* Estimating the density of X at c and the conditional variance of Y given X_i=c at x=c */
	local fxc = (`Nh1p'+`Nh1n')/(2*`N'*`h1')

	generate n$treatment =($treatment-`Wh1n')^2 
	quietly summarize n$treatment if (`c'-`h1')<=zscore & zscore<`c', detail
	local W1sum =r(sum)

	generate p$treatment =($treatment-`Wh1p')^2 
	quietly summarize p$treatment if `c'<=zscore & zscore<=(`c'+`h1'), detail
	local W2sum =r(sum)

	local Wsigma2c = (1/(`Nh1p'+`Nh1n'))*(`W1sum'+`W2sum')


	/******************************************************************************/
	/*                Step 1.1: Estimation of second derivatives                  */
	/******************************************************************************/

	/*Calculating the pilot bandwidth h2*/

	quietly summarize zscore if zscore>=`c', detail
	scalar medXp = r(p50)

	quietly summarize zscore if zscore<`c', detail
	scalar medXn = r(p50)

	quietly regress $treatment $treatment zscore_1 zscore_2 zscore_3 $house $neighbourhood $adjustment $year if zscore >= medXn & zscore <= medXp

	local m3c=6*_b[zscore_3] 

	quietly count if zscore>=`c'
	local Np=r(N)

	quietly count if zscore<`c'
	local Nn=r(N)
	
	if $min == 1 {
		local h2p=3.56*((`Wsigma2c'/(`fxc'*max((`m3c')^2, `minvalue')))^(1/7))*(`Np'^(-1/7))
		local h2n=3.56*((`Wsigma2c'/(`fxc'*max((`m3c')^2, `minvalue')))^(1/7))*(`Nn'^(-1/7))
	}
	else {	
		local h2p=3.56*((`Wsigma2c'/(`fxc'*(`m3c')^2))^(1/7))*(`Np'^(-1/7))
		local h2n=3.56*((`Wsigma2c'/(`fxc'*(`m3c')^2))^(1/7))*(`Nn'^(-1/7))
	}
	
	/*Calculating the estimates of the second derivatives*/
	quietly count if `c'<=zscore & zscore<=(`c'+`h2p')
	local N2p=r(N)

	count if (`c'-`h2n')<=zscore & zscore<`c'
	local N2n=r(N)

	generate constant = 1
	regress $treatment constant zscore_1 zscore_2 $house $neighbourhood $adjustment $year if `c'<=zscore & zscore<=(`c'+`h2p')
	local Wm2pc=2*_b[zscore_2]
	regress $treatment constant zscore_1 zscore_2 $house $neighbourhood $adjustment $year if (`c'-`h2n')<=zscore & zscore<`c'
	local Wm2nc=2*_b[zscore_2] 

	/********************************************************************************************/
	/* Step 1.3: Calculation of regularization terms and calculation of the 1st stage bandwidth */
	/********************************************************************************************/

	/*Calculating the regularization terms*/
	local Wrp=(720*`Wsigma2c')/(`N2p'*((`h2p')^4))
	local Wrn=(720*`Wsigma2c')/(`N2n'*((`h2n')^4))

	/*Calculating the optimal bandwidth*/

	local CK = 5.4
	local hfs= `CK'*(((2*`Wsigma2c')/(`fxc'*(((`Wm2pc'-`Wm2nc')^2)+(`Wrp'+`Wrn'))))^(1/5))*`N'^(-1/5)

	disp `hfs'

	regress $treatment $instrument $house $neighbourhood $adjustment $year constant if zscore > (`c'-`hfs') & zscore < (`c'+`hfs')
	predict pred$treatment, xb
	
	/**********************************************************************************/
	/* Step 2.1: Estimation of density f(c) and conditional variance sigma-squared(c) */
	/**********************************************************************************/

	/*Calculating the pilot bandwidth h1*/
	summarize zscore, detail
	local Sx = r(sd)
	local N = r(N)
	local h1 = 1.84*`Sx'*(`N'^(-1/5))

	/* Calculating the number of units on either side of the threshold and the average outcomes*/
	count if (`c'-`h1')<=zscore & zscore<`c'
	local Nh1n = r(N)

	count if `c'<=zscore & zscore<=(`c'+`h1')
	local Nh1p = r(N)

	ameans $dependent if (`c'-`h1')<=zscore & zscore<`c'
	local Yh1n = r(mean)

	ameans $dependent if `c'<=zscore & zscore<=(`c'+`h1')
	local Yh1p = r(mean)

	/* Estimating the density of X at c and the conditional variance of Y given X_i=c at x=c */
	local fxc = (`Nh1p'+`Nh1n')/(2*`N'*`h1')

	generate n$dependent =($dependent-`Yh1n')^2 
	summarize n$dependent if (`c'-`h1')<=zscore & zscore<`c', detail
	local Y1sum =r(sum)

	generate p$dependent =($dependent-`Yh1p')^2 
	summarize p$dependent if `c'<=zscore & zscore<=(`c'+`h1'), detail
	local Y2sum =r(sum)

	local Ysigma2c = (1/(`Nh1p'+`Nh1n'))*(`Y1sum'+`Y2sum')
	
	/* Estimate covariances of Y and W at x=c */
	
	corr $dependent $treatment if (`c'-`h1')<=zscore & zscore<`c', covariance
	local YWn = r(cov_12)
	corr $dependent $treatment if `c'<=zscore & zscore<=(`c'+`h1'), covariance
	local YWp = r(cov_12)
	
	local YWsigma2c = `YWn'+`YWp'
	
	
	/******************************************************************************/
	/*                Step 2.2: Estimation of second derivatives                  */
	/******************************************************************************/

	/*Calculating the pilot bandwidth h2*/

	summarize zscore if zscore>=`c', detail
	scalar medXp = r(p50)

	summarize zscore if zscore<`c', detail
	scalar medXn = r(p50)

	regress $dependent $treatment zscore_1 zscore_2 zscore_3 $house $neighbourhood $adjustment $year if zscore >= medXn & zscore <= medXp

	local m3c=6*_b[zscore_3] 

	count if zscore>=`c'
	local Np=r(N)

	count if zscore<`c'
	local Nn=r(N)
	
	if $min == 1 {
		local h2p=3.56*((`Ysigma2c'/(`fxc'*max((`m3c')^2, `minvalue')))^(1/7))*(`Np'^(-1/7))
		local h2n=3.56*((`Ysigma2c'/(`fxc'*max((`m3c')^2, `minvalue')))^(1/7))*(`Nn'^(-1/7))
	}
	else {
		local h2p=3.56*((`Ysigma2c'/(`fxc'*(`m3c')^2))^(1/7))*(`Np'^(-1/7))
		local h2n=3.56*((`Ysigma2c'/(`fxc'*(`m3c')^2))^(1/7))*(`Nn'^(-1/7))
	}
	
	/*Calculating the estimates of the second derivatives*/
	quietly count if `c'<=zscore & zscore<=(`c'+`h2p')
	local N2p=r(N)

	quietly count if (`c'-`h2n')<=zscore & zscore<`c'
	local N2n=r(N)

	regress $dependent constant zscore_1 zscore_2 $house $neighbourhood $adjustment $year if `c'<=zscore & zscore<=(`c'+`h2p')
	local Ym2pc=2*_b[zscore_2]
	regress $dependent constant zscore_1 zscore_2 $house $neighbourhood $adjustment $year if (`c'-`h2n')<=zscore & zscore<`c'
	local Ym2nc=2*_b[zscore_2] 

	/********************************************************************************************/
	/* Step 2.3: Calculation of regularization terms and calculation of the 2nd stage bandwidth */
	/********************************************************************************************/

	/*Calculating the regularization terms*/
	local Yrp=(720*`Ysigma2c')/(`N2p'*((`h2p')^4))
	local Yrn=(720*`Ysigma2c')/(`N2n'*((`h2n')^4))

	/*Calculating the optimal bandwidth*/

	local CK = 5.4
	local hss = `CK'*(((2*`Ysigma2c')/(`fxc'*(((`Ym2pc'-`Ym2nc')^2)+(`Yrp'+`Yrn'))))^(1/5))*`N'^(-1/5)

	regress $dependent pred$treatment $house $neighbourhood $adjustment $year constant if zscore > (`c'-`hss') & zscore < (`c'+`hss')
	local tfrd = _b[pred$treatment]	
	
	/******************************************************************************************/
	/*             Step 3: Calculation of the optimal bandwidth and the effect                */
	/******************************************************************************************/
	
	local h_fuzzy = `CK'*(((2*`Ysigma2c'+`tfrd'^2*2*`Wsigma2c'-2*`tfrd'*`YWsigma2c')/(`fxc'*(((`Ym2pc'-`Ym2nc')-`tfrd'*(`Wm2pc'-`Wm2nc'))^2+(`Yrp'+`Yrn')+`tfrd'*(`Wrp'+`Wrn'))))^( 1/5))*`N'^(-1/5)

	ivregress 2sls $dependent ($treatment = $instrument) $house $neighbourhood $adjustment $year constant if zscore > (`c'-`h_fuzzy') & zscore < (`c'+`h_fuzzy'), cluster($se)
	local beta_fuzzy = _b[$treatment]
	local sdbeta_fuzzy = _se[$treatment]
	local tbeta_fuzzy = abs(`beta_fuzzy'/`sdbeta_fuzzy')
	
	}
	
disp "beta =" `beta_sharp' " sd beta" `sdbeta_sharp' " T-stat beta = " `tbeta_sharp' " bandwidth SRD = " `h_sharp'
disp "beta =" `beta_fuzzy' " sd beta" `sdbeta_fuzzy' " T-stat beta = " `tbeta_fuzzy' " bandwidth FRD = " `h_fuzzy'

